In [1]:
# Imports
import graphistry
import numpy as np
import pandas as pd
from py2neo import Graph, Path
graphistry.register(key='48a82a78fdd442482cec24fe06051c905e2a382d581852a4ba645927c736acbcfe7256e22873a5c97cff6b8bd37c836b')
============================
In [41]:
# Static - Connect to the database
# graph = Graph('http://neo4j:nakama@ec2-34-212-133-23.us-west-2.compute.amazonaws.com:7474')
In [42]:
# tx = graph.cypher.begin()
# for name in ["Alice", "Bob", "Carol"]:
# tx.append("CREATE (person:Person {name:{name}}) RETURN person", name=name)
# alice, bob, carol = [result.one for result in tx.commit()]
# friends = Path(alice, "KNOWS", bob, "KNOWS", carol)
# graph.create(friends)
In [43]:
# graph.data("MATCH (a:address) --> (b:incoming_payment) --> (c:transaction) RETURN LIMIT 25")
In [44]:
# rows = pandas.read_csv('transactions.csv')[:1000]
# graphistry.hypergraph(rows)['graph'].plot()
In [45]:
# Retrieve all the paper metadata
# btc_metadata = pd.read_sql_query('SELECT * FROM Papers', conn)
# df = pd.DataFrame(graph.data("MATCH (n:transaction) Return n LIMIT 25"))
df.head()
citations.info()
metadata_merge = citations.merge(arxiv_metadata, left_on='source', right_on='id').merge(arxiv_metadata, left_on='target', right_on='id', suffixes=('_from', '_to'))
metadata_merge.info()
citations.head()
plotter = plotter.bind(edge_weight="label")
ig = plotter.pandas2igraph(metadata_merge)
vertex_metadata = pd.DataFrame(ig.vs['nodeid'], columns=['id']).merge(arxiv_metadata, how='left', on='id') ig.vs['primary_subject'] = vertex_metadata['primary_subject'] ig.vs['color'] = vertex_metadata['color'] ig.vs['title'] = vertex_metadata['title'] ig.vs['year'] = vertex_metadata['year'] ig.vs['month'] = vertex_metadata['month'] ig.vs['category'] = vertex_metadata['category']
ig.vs['in_degree'] = ig.indegree() plotter.bind(point_size='in_degree', point_color='color').plot(ig)
In [ ]:
In [2]:
transactions = pd.read_csv('transactions.csv')
transactions['Date'] = pd.to_datetime(transactions['Date'],unit='ms') #coerce date format
transactions[:3]
Out[2]:
In [3]:
print('DataFrame headers: {}' .format(list(transactions.columns)))
In [4]:
transactions.columns[-1]
Out[4]:
In [5]:
# 'taint' is weighted as 5
transactions['isTainted'].unique()
Out[5]:
In [6]:
# for item in transactions[transactions['isTainted'] == 5].isTainted:
# item = 10
In [7]:
# for column in transactions.columns[-1]:
# transactions[transactions == 5] = 10
In [8]:
transactions.shape
Out[8]:
In [9]:
transactions.info()
In [10]:
# transaction window
print(transactions['Date'].sort_values().head(1), '\n')
print(transactions['Date'].sort_values().tail(1))
Task: Spot the embezzling
In [11]:
g = graphistry.edges(transactions).bind(source='Source', destination='Destination')
In [12]:
g.plot()
Out[12]:
In [13]:
# Compute how much wallets received in new df 'wallet_in'
wallet_in = transactions\
.groupby('Destination')\
.agg({'isTainted': lambda x: 1 if x.sum() > 0 else 0, 'Amount $': np.sum})\
.reset_index().rename(columns={'Destination': 'wallet', 'isTainted': 'isTaintedWallet'})
# rename destination to wallet
# rename isTainted to isTaintedWallet
#not all wallets received money, tag these
wallet_in['Receivables'] = True
wallet_in[:3]
Out[13]:
In [14]:
wallet_in['isTaintedWallet'].unique()
Out[14]:
In [15]:
# Compute how much wallets sent in new df 'wallet_out'
wallet_out = transactions\
.groupby('Source')\
.agg({'isTainted': np.sum, 'Amount $': np.max})\
.reset_index().rename(columns={'Source': 'wallet', 'isTainted': 'isTaintedWallet'})
# rename source to wallet
# rename isTainted to isTaintedWallet
#not all wallets received money, tag these
wallet_out['Payables'] = True
wallet_out[:3]
Out[15]:
In [16]:
wallet_out['isTaintedWallet'].unique()
Out[16]:
In [17]:
# Join Data
wallets = pd.merge(wallet_in, wallet_out, how='outer')
wallets['Receivables'] = wallets['Receivables'].fillna(False)
wallets['Payables'] = wallets['Payables'].fillna(False)
print('# Wallets only sent or only received', len(wallet_in) + len(wallet_out) - len(wallets))
wallets[:3]
Out[17]:
In [18]:
tmp = wallets
In [19]:
# colors at: http://staging.graphistry.com/docs/legacy/api/0.9.2/palette.html#Paired
def convert_to_colors(value):
if value == 0:
return 36005 # magenta
else:
return 42005 # orange
tmp['isTaintedWallet'] = tmp['isTaintedWallet'].apply(convert_to_colors)
In [20]:
tmp['isTaintedWallet'].unique()
Out[20]:
In [24]:
g.nodes(tmp).bind(node='wallet', point_color='isTaintedWallet').plot()
Out[24]:
Plain-no-audio.mov
In [ ]: